#!/usr/bin/python3

"""
This script reads the .freq files written by maline, converts them into bit
vectors denoting which system calls were used and copies that information
into a JSON file for later analysis.

The output data format is as follows:
{"features": ["android.permission.RECEIVE_BOOT_COMPLETED", ...],
 "apps": {"<hash_of_apk_file>": {"vector": [0,0,0,1], "malicious": [0,1]}, ...}}
"""

import glob
import os
import json
from configparser import ConfigParser

__author__='mwleeds'

def main():
    config = ConfigParser()
    config.read('config.ini')
    MALINE_DIR = config.get('AMA', 'MALINE_DIR')
    MALINE_DIR = os.path.expanduser(MALINE_DIR)

    all_apps = {} # mapping combining app_syscall_map and app_malicious_map using bits
    all_syscalls = [] # list of strings naming each syscall for the architecture
    root_dir = os.getcwd()
    freq_files = glob.glob('all_freq/*.freq')
    os.chdir(MALINE_DIR)
    with open('data/i386-syscall.txt') as f:
        all_syscalls = [line.strip() for line in f.readlines()]
    os.chdir(root_dir)
    for filename in freq_files:
        print('Processing ' + filename)
        apk_name = filename.split('-')[1]
        malicious = os.path.isfile(os.getcwd() + '/malicious_apk/' + apk_name + '.apk')
        with open(filename) as f:
            frequencies = [freq for freq in f.readlines()[1].split(' ') if len(freq) > 0]
            assert(len(frequencies) == len(all_syscalls))
            frequency_bits = [1 if int(f) > 0 else 0 for f in frequencies]
        all_apps[apk_name + '.apk'] = {'vector': frequency_bits, 'malicious': [1,0] if malicious else [0,1]}
    with open('app_syscall_vectors.json', 'w') as outfile:
        json.dump({'features': all_syscalls, 'apps': all_apps}, outfile)
    print('Wrote data on ' + str(len(all_syscalls)) + ' syscalls and ' + str(len(all_apps)) + ' apps to a file.')

if __name__=='__main__':
    main()
